/*******************************************************************************
SIT-D replication source code.

	[Script No: 2]
	[Last updated: June 4th, 2024]
	
	This script process the consolidated administrative data to create 
	the final "master" data for administrative analysis. The scripts have 
	three parts:
	
		(1) Restrict the data to the select speficication and observation period
		
		(2) Build the "pre-training" data
			The outcomes in this part is considered baseline outcome
			
		(3) Build the "post-training" data 
			These are the data point for program effect analysis
		
		(4) Simplify the data
			Our Sit-D pipeline was built to be comprehendsive, but only
			a handful number of outcomes are included in the analysis.
			This section simplify the data set for sharability purpose.
	

*******************************************************************************/

	
* (2) Prepare the data set for analysis ----------------------------------------

	* Load the data
	use $output1, clear
	
	* Switch to the appropriate dates
	gen pre 				= pre_${spec}
	gen post 				= post_${spec}
	gen training 		 	= training_${spec}
	gen training_started 	= ${spec}_training_started
	gen training_ended	 	= ${spec}_training_ended
	
	* Sample restriction: 
	keep if (training==0) & inrange(year_month, $prestart, $postend)
	
	* List of variables to be included in PRS calculations
	gl prs_items 	warrant veh_rec firearm traf_stop driv_stop cont anov ///
					haz_cit nonhaz_cit curf_vio cta park_cit isrs
					
					// Warrant, vehicle recover, firearm violation 
					// traffic stop, driver stop, ANOVs, 
					// Citations hazard, citation non-hazard, curfew violation
					// parking citation, ISRs
	
	
	
* (3) Varable creations : PRE SITD ---------------------------------------------

	do $pre_script
	
			/* 	This section deals with creating variables that are fixed 
				throughout the officer timeline, including:
				- Baseline [2018-01, 2021-01] for the main outcomes
				- Endline assessment participation 
			*/

		
		
* (4) Variable creations : POST SITD -------------------------------------------
	
	* IMPORTANT: Starting from this section, the data only contain post-training data
	keep if (post == 1)
	
	* Identify month post training
	gen year_dif = floor(year_month/100) - floor(training_ended / 100)
	gen month = (year_month - training_ended) + (year_dif * (12 - 100))
	
		/* What does "month" mean 
		"month" indicate the order after Sit-D training. For instance,
		"1" means the first month that follows the month when the officer
		finish the training */
	
	* Call the subscript
	do $post_script
	
		/*	This subscript handles a number of operation
		- Create additional variable for the analysis (e.g. group identifier)
		- Create the month bins for the breakdown regression and multiple regressions
		- Create combined level 1-2 TRR 
		- Make the index calculation
		*/
		
		
		
* (5) Final data cleaning ------------------------------------------------------

	* Individual identifier
	bys employee_id: gen i = _n == 1
	
	// Only use this for variables that are constant through out the post period

	* Simplify the data set to the minimum variables for replication
	keep 		employee_id ///
				treatment strata ///
				year_month month missing no_endline i ///
				l12_trr_C arrest_con_cop days_iod ttt_score_cut_Am ///
				l23_trr_C trr_C ///
				l12_trr_s_i_C l12_trr_sa_injury_C ///
				l12_trr_s_h_C l12_trr_s_hos_i_ix_C ///
				award total_w_exo_accusations ///
				prs_score_w_arrs_cut_Am   ///
				prs_* ///
				i_complaint_pri_cut_Am i_complaint_sec_cut_Am i_complaints_cut_Am ///
				months_1_4 months_5_8 months_9_12 ///
				months_1_3 months_4_6 months_7_9 months_10_12 ///
				months_1_4_t months_5_8_t months_9_12_t ///
				months_1_3_t months_4_6_t months_7_9_t months_10_12_t ///
				months_1_12 months_1_12_t ///
				pre_arrest_con_cop pre_arrest_non_con_cop ///
				pre_prs_score_w_arrs pre_days_iod ///
				age experience ///
				dmale dblack dhispanic p_black_or_hispanic p_black p_hispanic dwhite dother ///
				pre_trr_C ///
				pre_l12_rc_trr_C pre_trr_s_i_C pre_trr_sa_injury_C ///
				pre_trr_officer_i_C pre_ttt_score pre_total_w_exo_accusations ///
				pre_ttt_score_e pre_prs_score_w_arrs_e ///
				pre_trr_s_h_C pre_award ///
				unit_OSS_rand unit_watch_OSS_rand u uw ///
				trr_nf_p2020_1_C trr_nf_p2020_2_C arrest_non_con_cop ratio ///
				all_crime_monthly_1K violent_crime_monthly_1K ///
				arr_* training_started training_ended

				
				
* (5) Save data ----------------------------------------------------------------
	save ${interim_data}, replace
	